Load preprocessed dataset (preprocessing code in data_preprocessing.Rmd)

glue('Number of genes: ', nrow(datExpr), '\n',
     'Number of samples: ', ncol(datExpr), ' (', sum(datMeta$Diagnosis_=='ASD'), ' ASD, ',
     sum(datMeta$Diagnosis_!='ASD'), ' controls)')
## Number of genes: 30109
## Number of samples: 80 (45 ASD, 35 controls)

Keep only differentially expressed genes

5838 genes don’t have an adjusted p-value because they have less mean normalized counts than the optimal threshold link, so they are going to be considered not to be significant

plot_data = data.frame('id'=rownames(datExpr), 'mean_expression' = rowMeans(datExpr), 
                       'SFARI_score'=DE_info$`gene-score`!='None',
                       'DExpressed'=DE_info$padj<0.05)
ggplotly(plot_data %>% ggplot(aes(x=mean_expression, fill=DExpressed, color=DExpressed)) + geom_density(alpha=0.3) + 
         scale_x_log10() + ggtitle('gene Mean Expression distribution') + theme_minimal())

We lose almost all of the genes with SFARI score

plot_data_SFARI = plot_data %>% filter(SFARI_score)
ggplotly(plot_data_SFARI %>% ggplot(aes(x=mean_expression, fill=DExpressed, color=DExpressed)) + geom_density(alpha=0.3) + 
              ggtitle('gene Mean Expression distribution for SFARI Genes') + scale_y_sqrt() + theme_minimal())
table(plot_data$DExpressed[plot_data$SFARI_score], useNA='ifany')
## 
## FALSE  TRUE  <NA> 
##   715   151    19
print(paste0('Losing ', round(100*(1-mean(plot_data$DExpressed[plot_data$SFARI_score==TRUE], na.rm=T)),1), 
             '% of the genes with SFARI score'))
## [1] "Losing 82.6% of the genes with SFARI score"
datExpr = datExpr[plot_data$DExpressed & !is.na(plot_data$DExpressed),]
DE_info = DE_info[plot_data$DExpressed & !is.na(plot_data$DExpressed),]
datGenes = datGenes[plot_data$DExpressed & !is.na(plot_data$DExpressed),]

rm(plot_data, plot_data_SFARI)

Dimensionality reduction using PCA

To make calculations more efficient for the more time consuming methods, we can perform PCA and keep the first 16 principal components which explain over 99.5% of the total variance

pca = prcomp(datExpr)
datExpr_redDim = pca$x %>% data.frame %>% dplyr::select(PC1:PC16)

Clustering Methods

clusterings = list()

clusterings[['SFARI_score']] = DE_info$`gene-score`
names(clusterings[['SFARI_score']]) = rownames(DE_info)

clusterings[['SFARI_bool']] = DE_info$`gene-score`!='None'
names(clusterings[['SFARI_bool']]) = rownames(DE_info)

clusterings[['syndromic']] = DE_info$syndromic==1
names(clusterings[['syndromic']]) = rownames(DE_info)



K-means clustering

set.seed(123)
wss = sapply(1:10, function(k) kmeans(datExpr, k, iter.max=200, nstart=25,
                                      algorithm='MacQueen')$tot.withinss)
plot(wss, type='b', main='K-Means Clustering')
best_k = 4
abline(v = best_k, col='blue')

datExpr_k_means = kmeans(datExpr, best_k, iter.max=100, nstart=25)
clusterings[['km']] = datExpr_k_means$cluster



Hierarchical Clustering

Chose k=9 as best number of clusters. SFARI genes seem to group in the last two clusters

h_clusts = datExpr %>% dist %>% hclust
plot(h_clusts, hang = -1, cex = 0.6, labels=FALSE)
abline(h=19, col='blue')

best_k = 10

SFARI and Neuronal related genes seem to concentrate mainly in the aqua, blue and pink clusters

clusterings[['hc']] = cutree(h_clusts, best_k)

create_viridis_dict = function(){
  min_score = clusterings[['SFARI_score']] %>% as.numeric %>% min(na.rm=TRUE)
  max_score = clusterings[['SFARI_score']] %>% as.numeric %>% max(na.rm=TRUE)
  viridis_score_cols = viridis(max_score - min_score + 1)
  names(viridis_score_cols) = seq(min_score, max_score)
  
  return(viridis_score_cols)
}

viridis_score_cols = create_viridis_dict()

dend_meta = DE_info[match(labels(h_clusts), DE_info$ID),] %>% 
            mutate('SFARI_score' = viridis_score_cols[`gene-score`],                     # Purple: 2, Yellow: 6
                   'SFARI_bool' = ifelse(`gene-score` != 'None', '#21908CFF', 'white'),  # Acqua
                   'Syndromic' = ifelse(syndromic == T, 'orange', 'white'),
                   'Neuronal' = ifelse(ID %in% GO_neuronal$ID, '#666666','white')) %>% 
            dplyr::select(SFARI_score, SFARI_bool, Syndromic, Neuronal)

h_clusts %>% as.dendrogram %>% set('labels', rep('', nrow(datMeta))) %>% 
             set('branches_k_color', k=best_k) %>% plot
colored_bars(colors=dend_meta)



Consensus Clustering

Samples are grouped into two big clusters, and then many outliers.


*The rest of the output plots can be found in the Data/Gandal/consensusClustering/genes_DE/ folder

Independent Component Analysis

Following this paper’s guidelines:

  1. Run PCA and keep enough components to explain 60% of the variance (keeping 99.5% of the variance)

  2. Run ICA with that same number of nbComp as principal components kept to then filter them

  3. Select components with kurtosis > 3

  4. Assign genes to clusters with FDR<0.01 using the fdrtool package

ICA_output = datExpr_redDim %>% runICA(nbComp=ncol(datExpr_redDim), method='JADE')
signals_w_kurtosis = ICA_output$S %>% data.frame %>% dplyr::select(names(apply(ICA_output$S, 2, kurtosis)>3))
ICA_clusters = apply(signals_w_kurtosis, 2, function(x) fdrtool(x, plot=F, verbose=F)$qval<0.01) %>% data.frame
ICA_clusters = ICA_clusters[colSums(ICA_clusters)>1]

clusterings[['ICA_min']] = rep(NA, nrow(ICA_clusters))
ICA_clusters_names = colnames(ICA_clusters)
for(c in ICA_clusters_names) clusterings[['ICA_min']][ICA_clusters[,c]] = c

clusterings[['ICA_NA']] = is.na(clusterings[['ICA_min']])

Leaves 76% of the genes without a cluster

ICA_clusters %>% rowSums %>% table
## .
##    0    1    2    3    4    5    6    7    8    9 
## 2297  336  168   91   45   36   14    8    4    1
# ICA_clusters %>% mutate(cl_sum=rowSums(.)) %>% as.matrix %>% melt %>% ggplot(aes(Var2, Var1)) + 
#   geom_tile(aes(fill=value)) + xlab('Clusters') + ylab('Samples') + 
#   theme_minimal() + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) + coord_flip()

Trying again these time with all of the principal components and 40 clusters

ICA_output = pca$x %>% data.frame %>% runICA(nbComp=40, method='JADE')
signals_w_kurtosis = ICA_output$S %>% data.frame %>% dplyr::select(names(apply(ICA_output$S, 2, kurtosis)>3))
ICA_clusters = apply(signals_w_kurtosis, 2, function(x) fdrtool(x, plot=F, verbose=F)$qval<0.01) %>% data.frame
ICA_clusters = ICA_clusters[colSums(ICA_clusters)>1]

Doesn’t make a big difference (69%), but it’s still better

ICA_clusters %>% rowSums %>% table
## .
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13 
## 2074  283  188  135   95   63   49   43   30   19    6    8    4    3
clusterings[['ICA_min']] = rep(NA, nrow(ICA_clusters))
ICA_clusters_names = colnames(ICA_clusters)
for(c in ICA_clusters_names) clusterings[['ICA_min']][ICA_clusters[,c]] = c

clusterings[['ICA_NA']] = is.na(clusterings[['ICA_min']])

# ICA_clusters %>% mutate(cl_sum=rowSums(.)) %>% as.matrix %>% melt %>% ggplot(aes(Var2, Var1)) + 
#   geom_tile(aes(fill=value)) + xlab('Clusters') + ylab('Samples') + 
#   theme_minimal() + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) + coord_flip()



WGCNA

Note: This method does not work with the reduced version of datExpr.

best_power = datExpr %>% t %>% pickSoftThreshold(powerVector = seq(1, 30, by=2))
##    Power SFT.R.sq  slope truncated.R.sq  mean.k. median.k.   max.k.
## 1      1  0.00342  0.261          0.988 7.75e+02  7.70e+02 1330.000
## 2      3  0.58100 -2.220          0.963 9.82e+01  9.03e+01  331.000
## 3      5  0.83600 -2.560          0.990 1.92e+01  1.58e+01  110.000
## 4      7  0.90000 -2.520          0.994 4.96e+00  3.49e+00   43.500
## 5      9  0.88700 -2.380          0.951 1.59e+00  9.14e-01   19.700
## 6     11  0.38500 -3.210          0.349 6.03e-01  2.66e-01    9.780
## 7     13  0.95700 -1.830          0.985 2.62e-01  8.50e-02    5.250
## 8     15  0.94200 -1.780          0.964 1.26e-01  2.95e-02    3.360
## 9     17  0.93400 -1.760          0.974 6.67e-02  1.08e-02    2.450
## 10    19  0.94800 -1.700          0.985 3.77e-02  4.11e-03    1.830
## 11    21  0.39700 -2.280          0.354 2.25e-02  1.65e-03    1.390
## 12    23  0.40700 -2.190          0.367 1.41e-02  6.55e-04    1.070
## 13    25  0.41200 -2.110          0.371 9.23e-03  2.72e-04    0.829
## 14    27  0.41400 -2.040          0.361 6.23e-03  1.15e-04    0.651
## 15    29  0.41300 -1.980          0.363 4.33e-03  4.94e-05    0.515
network = datExpr %>% t %>% blockwiseModules(power=best_power$powerEstimate, numericLabels=T)
clusterings[['WGCNA']] = network$colors
names(clusterings[['WGCNA']]) = rownames(datExpr)

It leaves 1211 genes without a cluster (~40%)

clusterings[['WGCNA']] %>% table
## .
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14 
## 1211  449  203  158  128  112  106  100   80   76   71   65   40   36   36 
##   15   16   17   18   19 
##   31   26   25   25   22



Gaussian Mixture Models with hard thresholding

The BIC decreases monotonically, but it seems to stabilise at bit at 14

n_clust = datExpr %>% Optimal_Clusters_GMM(max_clusters=40, criterion='BIC', plot_data=FALSE)
plot(n_clust, type='l', main='Bayesian Information Criterion to choose number of clusters')
abline(v=14, col='blue')

best_k = 14
gmm = datExpr %>% GMM(best_k)
clusterings[['GMM']] = gmm$Log_likelihood %>% apply(1, which.max)



Manual Clustering

Separating the two clouds of points into two clusters

intercept=-0.2
slope=0.02
manual_clusters = as.factor(as.numeric(slope*datExpr_redDim$PC1 + intercept > datExpr_redDim$PC2))
names(manual_clusters) = rownames(datExpr_redDim)
clusterings[['Manual']] = manual_clusters

datExpr_redDim %>% ggplot(aes(PC1, PC2, color=manual_clusters)) + geom_point(alpha=0.3) + 
              xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],1),'%)')) +
              ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],1),'%)')) +
              geom_abline(intercept=intercept, slope=slope, color='gray') +
              theme_minimal() + ggtitle('PCA')

clusterings[['Manual']] %>% table
## .
##    0    1 
## 1382 1618
rm(intercept, slope, pca)

Both the aqua and the salmon clusters seem to be componsed of three Gaussians in the Mean and SD plots.

manual_clusters_data = cbind(apply(datExpr_redDim, 1, mean), apply(datExpr_redDim, 1, sd), 
                             manual_clusters) %>% data.frame
colnames(manual_clusters_data) = c('mean','sd','cluster')
manual_clusters_data = manual_clusters_data %>% mutate('cluster'=as.factor(cluster))
p1 = manual_clusters_data %>% ggplot(aes(x=mean, color=cluster, fill=cluster)) + 
  geom_density(alpha=0.4) + theme_minimal()
p2 = manual_clusters_data %>% ggplot(aes(x=sd, color=cluster, fill=cluster)) + 
  geom_density(alpha=0.4) + theme_minimal()
grid.arrange(p1, p2, ncol=2)

Separate genes into four and two Gaussians, respectively by their mean expression:

gg_colour_hue = function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}

c1_mean = manual_clusters_data %>% filter(cluster==1) %>% dplyr::select(mean)
rownames(c1_mean) = rownames(manual_clusters_data)[manual_clusters_data$cluster=='1']
gmm_c1_mean = c1_mean %>% GMM(3)

c2_mean = manual_clusters_data %>% filter(cluster==2) %>% dplyr::select(mean)
rownames(c2_mean) = rownames(manual_clusters_data)[manual_clusters_data$cluster=='2']
gmm_c2_mean = c2_mean %>% GMM(3)

clusterings[['Manual_mean']] = as.character(clusterings[['Manual']])
clusterings[['Manual_mean']][clusterings[['Manual']]==0] = gmm_c1_mean$Log_likelihood %>% 
  apply(1, function(x) glue('1_',which.max(x)))
clusterings[['Manual_mean']][clusterings[['Manual']]==1] = gmm_c2_mean$Log_likelihood %>% 
  apply(1, function(x) glue('2_',which.max(x)))


plot_gaussians = manual_clusters_data %>% ggplot(aes(x=mean)) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(6)[1],
                args=list(mean=gmm_c1_mean$centroids[1], sd=gmm_c1_mean$covariance_matrices[1])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(6)[2],
                args=list(mean=gmm_c1_mean$centroids[2], sd=gmm_c1_mean$covariance_matrices[2])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(6)[3],
                args=list(mean=gmm_c1_mean$centroids[3], sd=gmm_c1_mean$covariance_matrices[3])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(6)[4],
                args=list(mean=gmm_c2_mean$centroids[1], sd=gmm_c2_mean$covariance_matrices[1])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(6)[5],
                args=list(mean=gmm_c2_mean$centroids[2], sd=gmm_c2_mean$covariance_matrices[2])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(6)[6],
                args=list(mean=gmm_c2_mean$centroids[3], sd=gmm_c2_mean$covariance_matrices[3])) +
  theme_minimal()

plot_points = datExpr_redDim %>% ggplot(aes_string(x='PC1', y='PC2', color=as.factor(clusterings[['Manual_mean']]))) + 
          geom_point() + theme_minimal()

grid.arrange(plot_gaussians, plot_points, ncol=2)

clusterings[['Manual_mean']] %>% table
## .
## 1_1 1_2 1_3 2_1 2_2 2_3 
## 491 321 570 337 595 686

Separate clusters into three Gaussians per diagnosis by their sd:

n_clusters = 3

c1_sd = manual_clusters_data %>% filter(cluster==1) %>% dplyr::select(sd)
rownames(c1_sd) = rownames(manual_clusters_data)[manual_clusters_data$cluster=='1']
gmm_c1_sd = c1_sd %>% GMM(n_clusters)

c2_sd = manual_clusters_data %>% filter(cluster==2) %>% dplyr::select(sd)
rownames(c2_sd) = rownames(manual_clusters_data)[manual_clusters_data$cluster=='2']
gmm_c2_sd = c2_sd %>% GMM(n_clusters)

clusterings[['Manual_sd']] = as.character(clusterings[['Manual']])
clusterings[['Manual_sd']][clusterings[['Manual']]==0] = gmm_c1_sd$Log_likelihood %>% 
  apply(1, function(x) glue('1_',which.max(x)))
clusterings[['Manual_sd']][clusterings[['Manual']]==1] = gmm_c2_sd$Log_likelihood %>% 
  apply(1, function(x) glue('2_',which.max(x)))


plot_gaussians = manual_clusters_data %>% ggplot(aes(x=sd)) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(2*n_clusters)[1],
                args=list(mean=gmm_c1_sd$centroids[1], sd=gmm_c1_sd$covariance_matrices[1])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(2*n_clusters)[2], 
                args=list(mean=gmm_c1_sd$centroids[2], sd=gmm_c1_sd$covariance_matrices[2])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(2*n_clusters)[3], 
                args=list(mean=gmm_c1_sd$centroids[3], sd=gmm_c1_sd$covariance_matrices[3])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(2*n_clusters)[4],
                args=list(mean=gmm_c2_sd$centroids[1], sd=gmm_c2_sd$covariance_matrices[1])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(2*n_clusters)[5],
                args=list(mean=gmm_c2_sd$centroids[2], sd=gmm_c2_sd$covariance_matrices[2])) +
  stat_function(fun=dnorm, n=100, colour=gg_colour_hue(2*n_clusters)[6],
                args=list(mean=gmm_c2_sd$centroids[3], sd=gmm_c2_sd$covariance_matrices[3])) +
  theme_minimal()


plot_points = datExpr_redDim %>% ggplot(aes_string(x='PC1', y='PC2', color=as.factor(clusterings[['Manual_sd']]))) + 
          geom_point() + theme_minimal()

grid.arrange(plot_gaussians, plot_points, ncol=2)

clusterings[['Manual_sd']] %>% table
## .
## 1_1 1_2 1_3 2_1 2_2 2_3 
## 461 337 584 452 603 563
rm(c1_sd, c2_sd, gmm_c1_sd, gmm_c2_sd)
# Clean up the environment a bit
rm(wss, datExpr_k_means, h_clusts, cc_output, best_k, ICA_output, ICA_clusters_names, signals_w_kurtosis, 
   n_clust, gmm, network, best_power, c, manual_clusters, manual_clusters_data, c1_mean, c2_mean, 
   gmm_c1_mean, gmm_c2_mean, p1, p2, dend_meta, plot_gaussians, plot_points, n_clusters, viridis_score_cols, 
   gg_colour_hue, create_viridis_dict)

Compare clusterings

Using Adjusted Rand Index:

cluster_sim = data.frame(matrix(nrow = length(clusterings), ncol = length(clusterings)))
for(i in 1:(length(clusterings))){
  cluster1 = as.factor(clusterings[[i]])
  for(j in (i):length(clusterings)){
    cluster2 = as.factor(clusterings[[j]])
    cluster_sim[i,j] = adj.rand.index(cluster1, cluster2)
  }
}
colnames(cluster_sim) = names(clusterings)
rownames(cluster_sim) = colnames(cluster_sim)

cluster_sim = cluster_sim %>% as.matrix %>% round(2)
heatmap.2(x = cluster_sim, Rowv = FALSE, Colv = FALSE, dendrogram = 'none', 
          cellnote = cluster_sim, notecol = 'black', trace = 'none', key = FALSE, 
          cexRow = 1, cexCol = 1, margins = c(7,7))

rm(i, j, cluster1, cluster2, cluster_sim)

Scatter plots

  • The simple clustering methods only consider the 1st component, dividing by vertical lines

  • GMM does vertical clusters when using the complete expression matrix but round, small clusters when using the reduced version

  • WGCNA clusters don’t seem to have a strong relation with the first principal components

  • SFARI genes seem to be everywhere (perhaps a bit more concentrated on the right side of the plot)

  • 1st PC seems to reflect the average level of expression of the genes

  • There seems to be a change in behaviour around PC1=0 (CC)

plot_points = datExpr_redDim %>% data.frame() %>% dplyr::select(PC1:PC3) %>%
              mutate(ID = rownames(.),                                 k_means = as.factor(clusterings[['km']]),
                hc = as.factor(clusterings[['hc']]),                   cc = as.factor(clusterings[['cc_l1']]),
                ica = as.factor(clusterings[['ICA_min']]),
                n_ica = as.factor(rowSums(ICA_clusters)),              gmm = as.factor(clusterings[['GMM']]),
                wgcna = as.factor(clusterings[['WGCNA']]),             manual = as.factor(clusterings[['Manual']]),
                manual_mean = as.factor(clusterings[['Manual_mean']]), manual_sd = as.factor(clusterings[['Manual_sd']]),   
                SFARI = as.factor(clusterings[['SFARI_score']]),       SFARI_bool = as.factor(clusterings[['SFARI_bool']]),
                syndromic = as.factor(clusterings[['syndromic']])) %>%
              bind_cols(DE_info[DE_info$ID %in% rownames(datExpr_redDim),]) %>% 
              mutate(avg_expr = log2(rowMeans(datExpr)+1)[rownames(datExpr) %in% rownames(datExpr_redDim)])
rownames(plot_points) = plot_points$ID
selectable_scatter_plot(plot_points, plot_points)




Save clusterings

clusterings_file = './../Data/Gandal/clusterings.csv'

if(file.exists(clusterings_file)){

  df = read.csv(clusterings_file, row.names=1)
  
  if(!all(rownames(df) == rownames(datExpr))) stop('Gene ordering does not match the one in clusterings.csv!')
  
  for(clustering in names(clusterings)){
    df = df %>% mutate(!!clustering := sub(0, NA, clusterings[[clustering]]))
    rownames(df) = rownames(datExpr)
  }
  
} else {
  
  df = clusterings %>% unlist %>% matrix(nrow=length(clusterings), byrow=T) %>% t %>% data.frame %>% na_if(0)
  colnames(df) = names(clusterings)
  rownames(df) = rownames(datExpr)

}

write.csv(df, file=clusterings_file)

rm(clusterings_file, df, clustering)

Session info

sessionInfo()
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Scientific Linux 7.6 (Nitrogen)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
##  [1] grid      stats4    parallel  stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] dendextend_1.10.0           gplots_3.0.1               
##  [3] pdfCluster_1.0-3            WGCNA_1.66                 
##  [5] fastcluster_1.1.25          dynamicTreeCut_1.63-1      
##  [7] ClusterR_1.1.8              fdrtool_1.2.15             
##  [9] moments_0.14                MineICA_1.22.0             
## [11] fastICA_1.2-1               Hmisc_4.1-1                
## [13] Formula_1.2-3               survival_2.43-3            
## [15] lattice_0.20-38             annotate_1.60.1            
## [17] XML_3.98-1.11               Rgraphviz_2.26.0           
## [19] igraph_1.2.4                colorspace_1.4-1           
## [21] mclust_5.4                  marray_1.60.0              
## [23] limma_3.38.3                cluster_2.0.7-1            
## [25] GOstats_2.48.0              graph_1.60.0               
## [27] Category_2.48.1             Matrix_1.2-15              
## [29] AnnotationDbi_1.44.0        IRanges_2.16.0             
## [31] S4Vectors_0.20.1            gtools_3.5.0               
## [33] biomaRt_2.38.0              xtable_1.8-3               
## [35] foreach_1.4.4               scales_1.0.0               
## [37] plyr_1.8.4                  Biobase_2.42.0             
## [39] BiocGenerics_0.28.0         JADE_2.0-1                 
## [41] ConsensusClusterPlus_1.46.0 GGally_1.4.0               
## [43] gridExtra_2.3               viridis_0.5.1              
## [45] viridisLite_0.3.0           RColorBrewer_1.1-2         
## [47] plotlyutils_0.0.0.9000      plotly_4.8.0               
## [49] glue_1.3.1                  reshape2_1.4.3             
## [51] forcats_0.3.0               stringr_1.4.0              
## [53] dplyr_0.8.0.1               purrr_0.3.1                
## [55] readr_1.3.1                 tidyr_0.8.3                
## [57] tibble_2.1.1                ggplot2_3.1.0              
## [59] tidyverse_1.2.1            
## 
## loaded via a namespace (and not attached):
##   [1] tidyselect_0.2.5            robust_0.4-18              
##   [3] RSQLite_2.1.1               htmlwidgets_1.3            
##   [5] trimcluster_0.1-2.1         BiocParallel_1.16.6        
##   [7] gmp_0.5-13.5                munsell_0.5.0              
##   [9] codetools_0.2-15            preprocessCore_1.44.0      
##  [11] withr_2.1.2                 knitr_1.22                 
##  [13] rstudioapi_0.7              geometry_0.4.0             
##  [15] robustbase_0.93-0           labeling_0.3               
##  [17] FD_1.0-12                   GenomeInfoDbData_1.2.0     
##  [19] bit64_0.9-7                 generics_0.0.2             
##  [21] xfun_0.5                    diptest_0.75-7             
##  [23] R6_2.4.0                    doParallel_1.0.14          
##  [25] GenomeInfoDb_1.18.2         clue_0.3-57                
##  [27] locfit_1.5-9.1              flexmix_2.3-15             
##  [29] bitops_1.0-6                reshape_0.8.7              
##  [31] DelayedArray_0.8.0          assertthat_0.2.1           
##  [33] promises_1.0.1              nnet_7.3-12                
##  [35] gtable_0.2.0                Cairo_1.5-9                
##  [37] rlang_0.3.2                 genefilter_1.64.0          
##  [39] splines_3.5.2               lazyeval_0.2.2             
##  [41] acepack_1.4.1               impute_1.56.0              
##  [43] broom_0.5.1                 checkmate_1.8.5            
##  [45] yaml_2.2.0                  abind_1.4-5                
##  [47] modelr_0.1.4                crosstalk_1.0.0            
##  [49] backports_1.1.2             httpuv_1.5.0               
##  [51] RBGL_1.58.2                 tools_3.5.2                
##  [53] Rcpp_1.0.1                  base64enc_0.1-3            
##  [55] progress_1.2.0              zlibbioc_1.28.0            
##  [57] RCurl_1.95-4.10             prettyunits_1.0.2          
##  [59] rpart_4.1-13                SummarizedExperiment_1.12.0
##  [61] haven_1.1.1                 magrittr_1.5               
##  [63] data.table_1.12.0           mvtnorm_1.0-7              
##  [65] whisker_0.3-2               matrixStats_0.54.0         
##  [67] mime_0.6                    hms_0.4.2                  
##  [69] evaluate_0.13               readxl_1.1.0               
##  [71] compiler_3.5.2              KernSmooth_2.23-15         
##  [73] crayon_1.3.4                htmltools_0.3.6            
##  [75] later_0.8.0                 mgcv_1.8-26                
##  [77] pcaPP_1.9-73                geneplotter_1.60.0         
##  [79] rrcov_1.4-3                 lubridate_1.7.4            
##  [81] DBI_1.0.0                   magic_1.5-9                
##  [83] MASS_7.3-51.1               fpc_2.1-11.1               
##  [85] ade4_1.7-11                 permute_0.9-4              
##  [87] cli_1.1.0                   gdata_2.18.0               
##  [89] GenomicRanges_1.34.0        pkgconfig_2.0.2            
##  [91] fit.models_0.5-14           foreign_0.8-71             
##  [93] xml2_1.2.0                  XVector_0.22.0             
##  [95] AnnotationForge_1.24.0      rvest_0.3.2                
##  [97] digest_0.6.18               vegan_2.5-2                
##  [99] rmarkdown_1.12              cellranger_1.1.0           
## [101] htmlTable_1.11.2            GSEABase_1.44.0            
## [103] kernlab_0.9-27              shiny_1.2.0                
## [105] modeltools_0.2-22           nlme_3.1-137               
## [107] jsonlite_1.6                pillar_1.3.1               
## [109] httr_1.4.0                  DEoptimR_1.0-8             
## [111] GO.db_3.7.0                 prabclus_2.2-7             
## [113] iterators_1.0.9             bit_1.1-14                 
## [115] class_7.3-14                stringi_1.4.3              
## [117] blob_1.1.1                  DESeq2_1.22.2              
## [119] latticeExtra_0.6-28         caTools_1.17.1             
## [121] memoise_1.1.0               ape_5.3